rm(list = ls())
gc()

library(lubridate)
library(foreign)
library(ggplot2)
library(ggthemes)
library(estimatr)
library(naniar)
library(tidyverse)


#Load MICE data
imputed = readRDS("data/imputed_data.rds")
long = readRDS("data/imputed_long.rds")
df = readRDS("data/svy.rds")

source("functions/pca_fun.R")

#checking there is no missing data here (to make sure all worked well).
for(i in 1:10){
  imputed_data1 = mice::complete(imputed, i)
  
  p_missing_long = unlist(lapply(imputed_data1, function(x) sum(is.na(x))))/nrow(imputed_data1)
  print(sort(p_missing_long[p_missing_long > 0], decreasing = TRUE))
}

long$response_num

#Clean the data, build PC indices, build additive indices
source("source_cleaning_pca.R")

long_final = long %>% left_join(long_additive)

# Export
long_final %>% saveRDS("data/baseline_clean_mi.rds")
